library(dplyr)
library(haven)
library(tidyr)

############
#Data import
############

data <- readRDS("embodied_interviewing_agents.rds")


#######################
#Sample characteristics
#######################

#age
summary(data$age)

mean(data$age[which(data$group == 1)], na.rm = T) #age across groups
mean(data$age[which(data$group == 2)], na.rm = T)
mean(data$age[which(data$group == 3)], na.rm = T)

summary(aov(age ~ factor(group), data = data)) #one-way analysis of variance (ANOVA)
pairwise.t.test(data$age, data$group, p.adjust.method = "bonferroni") #pairwise comparisons

#gender
prop.table(table(data$female))

prop.table(table(data$female, data$group), 2) #gender across groups
chisq.test(data$female, data$group) #chi-squared test

#education
prop.table(table(data$education))

prop.table(table(data$education, data$group), 2) #education across groups
chisq.test(data$education, data$group) #chi-squared test

#experimental groups
table(data$group)


##########################
#Item-nonresponse analysis
##########################

#first question (women's role in the workplace)
prop.table(table(data$workplace_nresp)) #total item-nonresponse
prop.table(table(data$workplace_cnresp)) #complete item-nonresponse
prop.table(table(data$workplace_pnresp, useNA = "ifany")) #partial item-nonresponse

prop.table(table(data$workplace_nresp, data$group), 2) #total item-nonresponse across groups
chisq.test(data$workplace_nresp, data$group) #chi-squared test

#second question (family relation)
prop.table(table(data$family_relation_nresp)) #total item-nonresponse
prop.table(table(data$family_relation_cnresp)) #complete item-nonresponse
prop.table(table(data$family_relation_pnresp, useNA = "ifany")) #partial item-nonresponse

prop.table(table(data$family_relation_nresp, data$group), 2) #total item-nonresponse across groups
chisq.test(data$family_relation_nresp, data$group) #chi-squared test


###################################
#Preparation of regression analyses
###################################

#create variable indicating complete observations
data$complete = case_when(!is.na(data$age) & !is.na(data$female) & !is.na(data$education) &
                            !is.na(data$interest) & !is.na(data$difficulty) & !is.na(data$personal_feeling) &
                            !is.na(data$satisfaction) ~ 1,
                          TRUE ~ 0)

#recode group variable, so that "text control condition" is used as reference
unique(data$group)
data$group_recoded = case_when(data$group == 3 ~ 1,
                               data$group == 1 ~ 2,
                               data$group == 2 ~ 3)
table(data$group_recoded)
table(data$group, data$group_recoded)


##########################################
#Regression analysis: number of characters
##########################################

#create hierarchical dataset
data_characters <- data %>%
  pivot_longer(c("workplace_characters", "family_relation_characters"), names_to = "question", values_to = "characters")

#recode question variable, so that "women's role in the workplace question" is used as reference
data_characters$question_recoded = case_when(data_characters$question == "workplace_characters" ~ 0,
                                             data_characters$question == "family_relation_characters" ~ 1)
table(data_characters$question_recoded)
table(data_characters$question, data_characters$question_recoded)

#M1
m1_chars <- lm(characters ~ factor(group_recoded), data = data_characters)
summary(m1_chars)

#M2
m2_chars <- lm(characters ~ factor(group_recoded) + factor(question_recoded) +
                 age + factor(female) + factor(education) + interest +
                 difficulty + personal_feeling + satisfaction, data = data_characters)
summary(m2_chars) 

#robustness check: hold observations constant across models
rtest_chars <- lm(characters ~ factor(group_recoded), 
                  data = data_characters[which(data_characters$complete == 1), ])
summary(rtest_chars)


##################################
#Regression analysis: topic number
##################################

#create hierarchical dataset
data_topics <- data %>%
  pivot_longer(c("workplace_topic_number", "family_relation_topic_number"), names_to = "question", values_to = "topics")

#recode question variable, so that "women's role in the workplace question" is used as reference
data_topics$question_recoded = case_when(data_topics$question == "workplace_topic_number" ~ 0,
                                         data_topics$question == "family_relation_topic_number" ~ 1)
table(data_topics$question_recoded)
table(data_topics$question, data_topics$question_recoded)

#M1
m1_topics <- lm(topics ~ factor(group_recoded), data = data_topics)
summary(m1_topics)

#M2
m2_topics <- lm(topics ~ factor(group_recoded) + factor(question_recoded) +
                  age + factor(female) + factor(education) + interest +
                  difficulty + personal_feeling + satisfaction, data = data_topics)
summary(m2_topics)

#robustness check: hold observations constant across models
rtest_topics <- lm(topics ~ factor(group_recoded), 
                   data = data_topics[which(data_topics$complete == 1), ])
summary(rtest_topics)


#####################################
#Regression analysis: sentiment ratio
#####################################

#create hierarchical dataset
data_sentiments <- data %>%
  pivot_longer(c("workplace_sentiment_ratio", "family_relation_sentiment_ratio"), names_to = "question", values_to = "sentiments")

#recode question variable, so that "women's role in the workplace question" is used as reference
data_sentiments$question_recoded = case_when(data_sentiments$question == "workplace_sentiment_ratio" ~ 0,
                                             data_sentiments$question == "family_relation_sentiment_ratio" ~ 1)
table(data_sentiments$question_recoded)
table(data_sentiments$question, data_sentiments$question_recoded)

#M1
m1_sentiments <- lm(sentiments ~ factor(group_recoded), data = data_sentiments)
summary(m1_sentiments)

#M2
m2_sentiments <- lm(sentiments ~ factor(group_recoded) + factor(question_recoded) +
                      age + factor(female) + factor(education) + interest +
                      difficulty + personal_feeling + satisfaction, data = data_sentiments)
summary(m2_sentiments)

#robustness check: hold observations constant across models
rtest_sentiments <- lm(sentiments ~ as.factor(group_recoded), 
                       data = data_sentiments[which(data_sentiments$complete == 1), ])
summary(rtest_sentiments)


#######################################
#SOM3: survey evaluations across groups
#######################################

#interest
mean(data$interest[which(data$group == 1)]) #interest across groups
mean(data$interest[which(data$group == 2)])
mean(data$interest[which(data$group == 3)])

summary(aov(interest ~ factor(group), data = data)) #one-way analysis of variance (ANOVA)
pairwise.t.test(data$interest, data$group, p.adjust.method = "bonferroni") #pairwise comparisons

#difficulty
mean(data$difficulty[which(data$group == 1)], na.rm = T) #difficulty across groups
mean(data$difficulty[which(data$group == 2)], na.rm = T)
mean(data$difficulty[which(data$group == 3)], na.rm = T)

summary(aov(difficulty ~ factor(group), data = data)) #one-way analysis of variance (ANOVA)
pairwise.t.test(data$difficulty, data$group, p.adjust.method = "bonferroni") #pairwise comparisons

#personal feeling
mean(data$personal_feeling[which(data$group == 1)], na.rm = T) #personal feeling across groups
mean(data$personal_feeling[which(data$group == 2)], na.rm = T)
mean(data$personal_feeling[which(data$group == 3)], na.rm = T)

summary(aov(personal_feeling ~ factor(group), data = data)) #one-way analysis of variance (ANOVA)
pairwise.t.test(data$personal_feeling, data$group, p.adjust.method = "bonferroni") #pairwise comparisons

#satisfaction
mean(data$satisfaction[which(data$group == 1)], na.rm = T) #satisfaction across groups
mean(data$satisfaction[which(data$group == 2)], na.rm = T)
mean(data$satisfaction[which(data$group == 3)], na.rm = T)

summary(aov(satisfaction ~ factor(group), data = data)) #one-way analysis of variance (ANOVA)
pairwise.t.test(data$satisfaction, data$group, p.adjust.method = "bonferroni") #pairwise comparisons
